import logging

from scrapy import signals
from puremysql import PureMysql
from datetime import datetime


class SpiderItemCountExtension(object):
    # 设置为数据库链接url eg: mysql://root:123456@127.0.0.1:3306/mydata
    ITEM_LOG_DATABASE_URL = 'mysql://root:123456@127.0.0.1:3306/spiderpro'

    # 设置数据表
    ITEM_LOG_TABLE = "log_spider"

    @classmethod
    def from_crawler(cls, crawler):
        ext = cls()
        crawler.signals.connect(ext.spider_closed, signal=signals.spider_closed)
        return ext

    def spider_closed(self, spider, reason):
        stats = spider.crawler.stats.get_stats()
        scraped_count = stats.get("item_scraped_count", 0)
        dropped_count = stats.get("item_dropped_count", 0)

        log_error = stats.get("log_count/ERROR", 0)
        start_time = stats.get("start_time")
        finish_time = stats.get("finish_time")
        duration = (finish_time - start_time).seconds

        count = scraped_count + dropped_count

        logging.debug("*" * 50)
        logging.debug("* {}".format(spider.name))
        logging.debug("* item count: {}".format(count))
        logging.debug("*" * 50)

        item = {
            "spider_name": spider.name,
            "item_count": count,
            "duration": duration,
            "log_error": log_error,
            "create_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        }

        mysql = PureMysql(db_url=self.ITEM_LOG_DATABASE_URL)
        table = mysql.table(self.ITEM_LOG_TABLE)
        table.insert(item)
        mysql.close()